// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "x86_decode.h"

namespace playground {

#if defined(__x86_64__) || defined(__i386__)
unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix,
                         char **rex_ptr, char **mod_rm_ptr, char **sib_ptr,
                         bool *is_group) {
  enum {
    BYTE_OP      = (1<<1), // 0x02
    IMM          = (1<<2), // 0x04
    IMM_BYTE     = (2<<2), // 0x08
    MEM_ABS      = (3<<2), // 0x0C
    MODE_MASK    = (7<<2), // 0x1C
    MOD_RM       = (1<<5), // 0x20
    STACK        = (1<<6), // 0x40
    GROUP        = (1<<7), // 0x80
    GROUP_MASK   = 0x7F,
  };

  static unsigned char opcode_types[512] = {
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00  -  0x07
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08  -  0x0F
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10  -  0x17
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18  -  0x1F
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20  -  0x27
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28  -  0x2F
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30  -  0x37
    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38  -  0x3F
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40  -  0x47
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48  -  0x4F
    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50  -  0x57
    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58  -  0x5F
    0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60  -  0x67
    0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68  -  0x6F
    0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70  -  0x77
    0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78  -  0x7F
    0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80  -  0x87
    0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88  -  0x8F
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90  -  0x97
    0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98  -  0x9F
    0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0  -  0xA7
    0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8  -  0xAF
    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0  -  0xB7
    0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8  -  0xBF
    0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0  -  0xC7
    0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8  -  0xCF
    0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0  -  0xD7
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8  -  0xDF
    0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0  -  0xE7
    0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8  -  0xEF
    0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0  -  0xF7
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8  -  0xFF
    0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07
    0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F
    0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F
    0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77
    0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F
    0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7
    0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF
    0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7
    0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF
    0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7
    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF
  };

  static unsigned char group_table[56] = {
    0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A
    0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte)
    0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3
    0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4
    0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5
    0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7
    0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate)
  };

  const unsigned char *insn_ptr = reinterpret_cast<const unsigned char *>(*ip);
  int operand_width = 4;
  int address_width = 4;
  if (is64bit) {
    address_width = 8;
  }
  unsigned char byte, rex = 0;
  bool found_prefix = false;
  if (rex_ptr) {
    *rex_ptr = 0;
  }
  if (mod_rm_ptr) {
    *mod_rm_ptr = 0;
  }
  if (sib_ptr) {
    *sib_ptr = 0;
  }
  for (;; ++insn_ptr) {
    switch (byte = *insn_ptr) {
      case 0x66: // Operand width prefix
        operand_width ^= 6;
        break;
      case 0x67: // Address width prefix
        address_width ^= is64bit ? 12 : 6;
        break;
      case 0x26: // Segment selector prefixes
      case 0x2e:
      case 0x36:
      case 0x3e:
      case 0x64:
      case 0x65:
      case 0xF0:
      case 0xF2:
      case 0xF3:
        break;
      case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes
      case 0x44: case 0x45: case 0x46: case 0x47:
      case 0x48: case 0x49: case 0x4A: case 0x4B:
      case 0x4C: case 0x4D: case 0x4E: case 0x4F:
        if (is64bit) {
          if (rex_ptr) {
            *rex_ptr = (char *)insn_ptr;
          }
          rex = byte;
          found_prefix = true;
          continue;
        }
        // fall through
      default:
        ++insn_ptr;
        goto no_more_prefixes;
    }
    rex = 0;
    found_prefix = true;
  }
no_more_prefixes:
  if (has_prefix) {
    *has_prefix = found_prefix;
  }
  if (rex & REX_W) {
    operand_width = 8;
  }
  unsigned char type;
  unsigned short insn = byte;
  unsigned int idx = 0;
  if (byte == 0x0F) {
    byte = *insn_ptr++;
    insn = (insn << 8) | byte;
    idx  = 256;
  }
  type = opcode_types[idx + byte];
  bool found_mod_rm = false;
  bool found_group = false;
  bool found_sib = false;
  unsigned char mod_rm = 0;
  unsigned char sib = 0;
  if (type & GROUP) {
    found_mod_rm = true;
    found_group = true;
    mod_rm = *insn_ptr;
    if (mod_rm_ptr) {
      *mod_rm_ptr = (char *)insn_ptr;
    }
    unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7);
    if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) {
      group += 8;
    }
    type = group_table[group];
  }
  if (!type) {
    // We know that we still don't decode some of the more obscure
    // instructions, but for all practical purposes that doesn't matter.
    // Compilers are unlikely to output them, and even if we encounter
    // hand-coded assembly, we will soon synchronize to the instruction
    // stream again.
    //
    // std::cerr << "Unsupported instruction at 0x" << std::hex <<
    //     std::uppercase << reinterpret_cast<long>(*ip) << " [ ";
    // for (const unsigned char *ptr =
    //          reinterpret_cast<const unsigned char *>(*ip);
    //      ptr < insn_ptr; ) {
    //   std::cerr << std::hex << std::uppercase << std::setw(2) <<
    //       std::setfill('0') << (unsigned int)*ptr++ << ' ';
    // }
    // std::cerr << "]" << std::endl;
  } else {
    if (is64bit && (type & STACK)) {
      operand_width = 8;
    }
    if (type & MOD_RM) {
      found_mod_rm = true;
      if (mod_rm_ptr) {
        *mod_rm_ptr = (char *)insn_ptr;
      }
      mod_rm = *insn_ptr++;
      int mod = (mod_rm >> 6) & 0x3;
      int rm  = 8*(rex & REX_B) + (mod_rm & 0x7);
      if (mod != 3) {
        if (address_width == 2) {
          switch (mod) {
            case 0:
              if (rm != 6 /* SI */) {
                break;
              }
              // fall through
            case 2:
              insn_ptr++;
              // fall through
            case 1:
              insn_ptr++;
              break;
          }
        } else {
          if ((rm & 0x7) == 4) {
            found_sib = true;
            if (sib_ptr) {
              *sib_ptr = (char *)insn_ptr;
            }
            sib = *insn_ptr++;
            if (!mod && (sib & 0x7) == 5 /* BP */) {
              insn_ptr += 4;
            }
          }
          switch (mod) {
            case 0:
              if (rm != 5 /* BP */) {
                break;
              }
              // fall through
            case 2:
              insn_ptr += 3;
              // fall through
            case 1:
              insn_ptr++;
              break;
          }
        }
      }
    }
    switch (insn) {
      case 0xC8: // ENTER
        insn_ptr++;
        // fall through
      case 0x9A: // CALL (far)
      case 0xC2: // RET (near)
      case 0xCA: // LRET
      case 0xEA: // JMP (far)
        insn_ptr += 2;
        break;
      case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel)
      case 0xF84: case 0xF85: case 0xF86: case 0xF87:
      case 0xF88: case 0xF89: case 0xF8A: case 0xF8B:
      case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F:
        insn_ptr += operand_width;
        break;
    }
    switch (type & MODE_MASK) {
      case IMM:
        if (!(type & BYTE_OP)) {
          switch (insn) {
            case 0xB8: case 0xB9: case 0xBA: case 0xBB:
            case 0xBC: case 0xBD: case 0xBE: case 0xBF:
              // Allow MOV to/from 64bit addresses
              insn_ptr += operand_width;
              break;
            default:
              insn_ptr += (operand_width == 8) ? 4 : operand_width;
              break;
          }
          break;
        }
        // fall through
      case IMM_BYTE:
        insn_ptr++;
        break;
      case MEM_ABS:
        insn_ptr += address_width;
        break;
    }
  }
  if (is_group) {
    *is_group = found_group;
  }
  *ip = reinterpret_cast<const char *>(insn_ptr);
  (void)found_mod_rm;
  (void)found_sib;
  return insn;
}
#endif

} // namespace
